/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.jasper.compiler;

/**
 * Converts a JSP attribute value into the unquoted equivalent. The attribute
 * may contain EL expressions, in which case care needs to be taken to avoid any
 * ambiguities. For example, consider the attribute values "${1+1}" and
 * "\${1+1}". After unquoting, both appear as "${1+1}" but the first should
 * evaluate to "2" and the second to "${1+1}". Literal \, $ and # need special
 * treatment to ensure there is no ambiguity. The JSP attribute unquoting covers
 * \\, \", \', \$, \#, %\&gt;, &lt;\%, &amp;apos; and &amp;quot;
 */
public class AttributeParser {

	/* System property that controls if the strict quoting rules are applied. */
	private static final boolean STRICT_QUOTE_ESCAPING = Boolean.valueOf(
			System.getProperty(
					"org.apache.jasper.compiler.Parser.STRICT_QUOTE_ESCAPING",
					"true")).booleanValue();

	/**
	 * Parses the provided input String as a JSP attribute and returns an
	 * unquoted value.
	 * 
	 * @param input
	 *            The input.
	 * @param quote
	 *            The quote character for the attribute or 0 for scripting
	 *            expressions.
	 * @param isELIgnored
	 *            Is expression language being ignored on the page where the JSP
	 *            attribute is defined.
	 * @param isDeferredSyntaxAllowedAsLiteral
	 *            Are deferred expressions treated as literals?
	 * @return An unquoted JSP attribute that, if it contains expression
	 *         language can be safely passed to the EL processor without fear of
	 *         ambiguity.
	 */
	public static String getUnquoted(String input, char quote,
			boolean isELIgnored, boolean isDeferredSyntaxAllowedAsLiteral) {
		return (new AttributeParser(input, quote, isELIgnored,
				isDeferredSyntaxAllowedAsLiteral, STRICT_QUOTE_ESCAPING))
				.getUnquoted();
	}

	/**
	 * Provided solely for unit test purposes and allows per call overriding of
	 * the STRICT_QUOTE_ESCAPING system property.
	 * 
	 * @param input
	 *            The input.
	 * @param quote
	 *            The quote character for the attribute or 0 for scripting
	 *            expressions.
	 * @param isELIgnored
	 *            Is expression language being ignored on the page where the JSP
	 *            attribute is defined.
	 * @param isDeferredSyntaxAllowedAsLiteral
	 *            Are deferred expressions treated as literals?
	 * @param strict
	 *            The value to use for STRICT_QUOTE_ESCAPING.
	 * @return An unquoted JSP attribute that, if it contains expression
	 *         language can be safely passed to the EL processor without fear of
	 *         ambiguity.
	 */
	protected static String getUnquoted(String input, char quote,
			boolean isELIgnored, boolean isDeferredSyntaxAllowedAsLiteral,
			boolean strict) {
		return (new AttributeParser(input, quote, isELIgnored,
				isDeferredSyntaxAllowedAsLiteral, strict)).getUnquoted();
	}

	/* The quoted input string. */
	private final String input;

	/* The quote used for the attribute - null for scripting expressions. */
	private final char quote;

	/*
	 * Is expression language being ignored - affects unquoting. \$ and \# are
	 * treated as literals rather than quoted values.
	 */
	private final boolean isELIgnored;

	/* Are deferred expression treated as literals */
	private final boolean isDeferredSyntaxAllowedAsLiteral;

	/* Overrides the STRICT_QUOTE_ESCAPING. Used for Unit tests only. */
	private final boolean strict;

	/* The type ($ or #) of expression. Literals have a type of null. */
	private char type;

	/* The length of the quoted input string. */
	private final int size;

	/* Tracks the current position of the parser in the input String. */
	private int i = 0;

	/* Indicates if the last character returned by nextChar() was escaped. */
	private boolean lastChEscaped = false;

	/* The unquoted result. */
	private StringBuilder result;

	/**
	 * For test purposes.
	 * 
	 * @param input
	 * @param quote
	 * @param strict
	 */
	private AttributeParser(String input, char quote, boolean isELIgnored,
			boolean isDeferredSyntaxAllowedAsLiteral, boolean strict) {
		this.input = input;
		this.quote = quote;
		this.isELIgnored = isELIgnored;
		this.isDeferredSyntaxAllowedAsLiteral = isDeferredSyntaxAllowedAsLiteral;
		this.strict = strict;
		this.type = getType(input);
		this.size = input.length();
		result = new StringBuilder(size);
	}

	/*
	 * Work through input looking for literals and expressions until the input
	 * has all been read.
	 */
	private String getUnquoted() {
		while (i < size) {
			parseLiteral();
			parseEL();
		}
		return result.toString();
	}

	/*
	 * This method gets the next unquoted character and looks for - literals
	 * that need to be converted for EL processing \ -> type{'\\'} $ ->
	 * type{'$'} # -> type{'$'} - start of EL ${ #{ Note all the examples above
	 * *do not* include the escaping required to use the values in Java code.
	 */
	private void parseLiteral() {
		boolean foundEL = false;
		while (i < size && !foundEL) {
			char ch = nextChar();
			if (!isELIgnored && ch == '\\') {
				if (type == 0) {
					result.append("\\");
				} else {
					result.append(type);
					result.append("{'\\\\'}");
				}
			} else if (!isELIgnored && ch == '$' && lastChEscaped) {
				if (type == 0) {
					result.append("\\$");
				} else {
					result.append(type);
					result.append("{'$'}");
				}
			} else if (!isELIgnored && ch == '#' && lastChEscaped) {
				// Note if isDeferredSyntaxAllowedAsLiteral==true, \# will
				// not be treated as an escape
				if (type == 0) {
					result.append("\\#");
				} else {
					result.append(type);
					result.append("{'#'}");
				}
			} else if (ch == type) {
				if (i < size) {
					char next = input.charAt(i);
					if (next == '{') {
						foundEL = true;
						// Move back to start of EL
						i--;
					} else {
						result.append(ch);
					}
				} else {
					result.append(ch);
				}
			} else {
				result.append(ch);
			}
		}
	}

	/*
	 * For EL need to unquote everything but no need to convert anything. The EL
	 * is terminated by '}'. The only other valid location for '}' is inside a
	 * StringLiteral. The literals are delimited by '\'' or '\"'. The only other
	 * valid location for '\'' or '\"' is also inside a StringLiteral. A quote
	 * character inside a StringLiteral must be escaped if the same quote
	 * character is used to delimit the StringLiteral.
	 */
	private void parseEL() {
		boolean endEL = false;
		boolean insideLiteral = false;
		char literalQuote = 0;
		while (i < size && !endEL) {
			char ch = nextChar();
			if (ch == '\'' || ch == '\"') {
				if (insideLiteral) {
					if (literalQuote == ch) {
						insideLiteral = false;
					}
				} else {
					insideLiteral = true;
					literalQuote = ch;
				}
				result.append(ch);
			} else if (ch == '\\') {
				result.append(ch);
				if (insideLiteral && size < i) {
					ch = nextChar();
					result.append(ch);
				}
			} else if (ch == '}') {
				if (!insideLiteral) {
					endEL = true;
				}
				result.append(ch);
			} else {
				result.append(ch);
			}
		}
	}

	/*
	 * Returns the nest unquoted character and sets the lastChEscaped flag to
	 * indicate if it was quoted/escaped or not. &apos; is always unquoted to '
	 * &quot; is always unquoted to " \" is always unquoted to " \' is always
	 * unquoted to ' \\ is always unquoted to \ \$ is unquoted to $ if EL is not
	 * being ignored \# is unquoted to # if EL is not being ignored <\% is
	 * always unquoted to <% %\> is always unquoted to %>
	 */
	private char nextChar() {
		lastChEscaped = false;
		char ch = input.charAt(i);

		if (ch == '&') {
			if (i + 5 < size && input.charAt(i + 1) == 'a'
					&& input.charAt(i + 2) == 'p' && input.charAt(i + 3) == 'o'
					&& input.charAt(i + 4) == 's' && input.charAt(i + 5) == ';') {
				ch = '\'';
				i += 6;
			} else if (i + 5 < size && input.charAt(i + 1) == 'q'
					&& input.charAt(i + 2) == 'u' && input.charAt(i + 3) == 'o'
					&& input.charAt(i + 4) == 't' && input.charAt(i + 5) == ';') {
				ch = '\"';
				i += 6;
			} else {
				++i;
			}
		} else if (ch == '\\' && i + 1 < size) {
			ch = input.charAt(i + 1);
			if (ch == '\\'
					|| ch == '\"'
					|| ch == '\''
					|| (!isELIgnored && (ch == '$' || (!isDeferredSyntaxAllowedAsLiteral && ch == '#')))) {
				i += 2;
				lastChEscaped = true;
			} else {
				ch = '\\';
				++i;
			}
		} else if (ch == '<' && (i + 2 < size) && input.charAt(i + 1) == '\\'
				&& input.charAt(i + 2) == '%') {
			// Note this is a hack since nextChar only returns a single char
			// It is safe since <% does not require special treatment for EL
			// or for literals
			result.append('<');
			i += 3;
			return '%';
		} else if (ch == '%' && i + 2 < size && input.charAt(i + 1) == '\\'
				&& input.charAt(i + 2) == '>') {
			// Note this is a hack since nextChar only returns a single char
			// It is safe since %> does not require special treatment for EL
			// or for literals
			result.append('%');
			i += 3;
			return '>';
		} else if (ch == quote && strict) {
			String msg = Localizer.getMessage("jsp.error.attribute.noescape",
					input, "" + quote);
			throw new IllegalArgumentException(msg);
		} else {
			++i;
		}

		return ch;
	}

	/*
	 * Determines the type of expression by looking for the first unquoted ${ or
	 * #{.
	 */
	private char getType(String value) {
		if (value == null) {
			return 0;
		}

		if (isELIgnored) {
			return 0;
		}

		int j = 0;
		int len = value.length();
		char current;

		while (j < len) {
			current = value.charAt(j);
			if (current == '\\') {
				// Escape character - skip a character
				j++;
			} else if (current == '#' && !isDeferredSyntaxAllowedAsLiteral) {
				if (j < (len - 1) && value.charAt(j + 1) == '{') {
					return '#';
				}
			} else if (current == '$') {
				if (j < (len - 1) && value.charAt(j + 1) == '{') {
					return '$';
				}
			}
			j++;
		}
		return 0;
	}
}
